import pandas as pd—
title: “Summer Program: Python Programming _ Day2 Quiz” author: “김보람” date: “07/25/2023”
lesson 7
1
from IPython.core.display import HTML
HTML('<table border="1" class="dataframe">\n <thead>\n <tr style="text-align: right;">\n <th></th>\n <th>A</th>\n <th>B</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>-2</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>-3</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>-4</td>\n </tr>\n </tbody>\n</table>')| A | B | |
|---|---|---|
| 0 | 1 | -2 |
| 1 | 2 | -3 |
| 2 | 3 | -4 |
df = pd.DataFrame({'A':[1,2,3],'B':[-2,-3,-4]})
df| A | B | |
|---|---|---|
| 0 | 1 | -2 |
| 1 | 2 | -3 |
| 2 | 3 | -4 |
2
df.columns = ['X1','X2']
df| X1 | X2 | |
|---|---|---|
| 0 | 1 | -2 |
| 1 | 2 | -3 |
| 2 | 3 | -4 |
3
df = pd.DataFrame(np.random.normal(size=(100,5)),columns=list('ABCDE'))
df| A | B | C | D | E | |
|---|---|---|---|---|---|
| 0 | 1.463910 | -0.189044 | -1.154230 | -1.063342 | 1.027457 |
| 1 | 0.821139 | -0.511394 | -3.710588 | 1.160104 | -0.107635 |
| 2 | -0.732671 | -1.033050 | -0.295740 | 0.282198 | -0.215732 |
| 3 | 1.598415 | -2.190977 | -0.803828 | -0.956396 | -0.174917 |
| 4 | -0.185002 | 1.154373 | -0.236004 | -2.128459 | -1.623930 |
| ... | ... | ... | ... | ... | ... |
| 95 | -1.085273 | 0.428166 | 0.744181 | 0.533127 | -0.263671 |
| 96 | -0.528071 | 1.460818 | -0.773037 | -0.355642 | 1.661265 |
| 97 | -0.044476 | -0.269066 | 0.018526 | -0.318183 | 0.990264 |
| 98 | -0.324435 | 0.583649 | 1.500259 | 0.257866 | -2.036244 |
| 99 | 2.104528 | 0.380031 | -0.353625 | -1.402250 | -1.109209 |
100 rows × 5 columns
df[['B','D']]| B | D | |
|---|---|---|
| 0 | -0.189044 | -1.063342 |
| 1 | -0.511394 | 1.160104 |
| 2 | -1.033050 | 0.282198 |
| 3 | -2.190977 | -0.956396 |
| 4 | 1.154373 | -2.128459 |
| ... | ... | ... |
| 95 | 0.428166 | 0.533127 |
| 96 | 1.460818 | -0.355642 |
| 97 | -0.269066 | -0.318183 |
| 98 | 0.583649 | 0.257866 |
| 99 | 0.380031 | -1.402250 |
100 rows × 2 columns
4
df[-10:]| A | B | C | D | E | |
|---|---|---|---|---|---|
| 90 | -0.167761 | -0.664358 | 1.867122 | -0.177171 | 0.197525 |
| 91 | 0.377890 | -1.048871 | 2.145238 | 0.424379 | 0.525715 |
| 92 | 0.696097 | -0.948450 | 0.611477 | 1.591229 | 0.091939 |
| 93 | -0.419354 | 1.160804 | -0.993989 | 0.987491 | -1.580077 |
| 94 | 1.178698 | -0.141507 | -0.717452 | 1.447968 | 0.585297 |
| 95 | -1.085273 | 0.428166 | 0.744181 | 0.533127 | -0.263671 |
| 96 | -0.528071 | 1.460818 | -0.773037 | -0.355642 | 1.661265 |
| 97 | -0.044476 | -0.269066 | 0.018526 | -0.318183 | 0.990264 |
| 98 | -0.324435 | 0.583649 | 1.500259 | 0.257866 | -2.036244 |
| 99 | 2.104528 | 0.380031 | -0.353625 | -1.402250 | -1.109209 |
5
df[['A','B']][:10]| A | B | |
|---|---|---|
| 0 | 1.463910 | -0.189044 |
| 1 | 0.821139 | -0.511394 |
| 2 | -0.732671 | -1.033050 |
| 3 | 1.598415 | -2.190977 |
| 4 | -0.185002 | 1.154373 |
| 5 | 1.410994 | -1.352321 |
| 6 | 0.547868 | 0.647899 |
| 7 | -2.993996 | -0.656354 |
| 8 | 0.016301 | 0.654041 |
| 9 | -0.973628 | 0.756796 |
df.loc[:,['A','B']].iloc[:10]| A | B | |
|---|---|---|
| 0 | 1.463910 | -0.189044 |
| 1 | 0.821139 | -0.511394 |
| 2 | -0.732671 | -1.033050 |
| 3 | 1.598415 | -2.190977 |
| 4 | -0.185002 | 1.154373 |
| 5 | 1.410994 | -1.352321 |
| 6 | 0.547868 | 0.647899 |
| 7 | -2.993996 | -0.656354 |
| 8 | 0.016301 | 0.654041 |
| 9 | -0.973628 | 0.756796 |
6
df=pd.read_csv('https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/movie.csv')
df| color | director_name | num_critic_for_reviews | duration | director_facebook_likes | actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | gross | genres | ... | num_user_for_reviews | language | country | content_rating | budget | title_year | actor_2_facebook_likes | imdb_score | aspect_ratio | movie_facebook_likes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Color | James Cameron | 723.0 | 178.0 | 0.0 | 855.0 | Joel David Moore | 1000.0 | 760505847.0 | Action|Adventure|Fantasy|Sci-Fi | ... | 3054.0 | English | USA | PG-13 | 237000000.0 | 2009.0 | 936.0 | 7.9 | 1.78 | 33000 |
| 1 | Color | Gore Verbinski | 302.0 | 169.0 | 563.0 | 1000.0 | Orlando Bloom | 40000.0 | 309404152.0 | Action|Adventure|Fantasy | ... | 1238.0 | English | USA | PG-13 | 300000000.0 | 2007.0 | 5000.0 | 7.1 | 2.35 | 0 |
| 2 | Color | Sam Mendes | 602.0 | 148.0 | 0.0 | 161.0 | Rory Kinnear | 11000.0 | 200074175.0 | Action|Adventure|Thriller | ... | 994.0 | English | UK | PG-13 | 245000000.0 | 2015.0 | 393.0 | 6.8 | 2.35 | 85000 |
| 3 | Color | Christopher Nolan | 813.0 | 164.0 | 22000.0 | 23000.0 | Christian Bale | 27000.0 | 448130642.0 | Action|Thriller | ... | 2701.0 | English | USA | PG-13 | 250000000.0 | 2012.0 | 23000.0 | 8.5 | 2.35 | 164000 |
| 4 | NaN | Doug Walker | NaN | NaN | 131.0 | NaN | Rob Walker | 131.0 | NaN | Documentary | ... | NaN | NaN | NaN | NaN | NaN | NaN | 12.0 | 7.1 | NaN | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4911 | Color | Scott Smith | 1.0 | 87.0 | 2.0 | 318.0 | Daphne Zuniga | 637.0 | NaN | Comedy|Drama | ... | 6.0 | English | Canada | NaN | NaN | 2013.0 | 470.0 | 7.7 | NaN | 84 |
| 4912 | Color | NaN | 43.0 | 43.0 | NaN | 319.0 | Valorie Curry | 841.0 | NaN | Crime|Drama|Mystery|Thriller | ... | 359.0 | English | USA | TV-14 | NaN | NaN | 593.0 | 7.5 | 16.00 | 32000 |
| 4913 | Color | Benjamin Roberds | 13.0 | 76.0 | 0.0 | 0.0 | Maxwell Moody | 0.0 | NaN | Drama|Horror|Thriller | ... | 3.0 | English | USA | NaN | 1400.0 | 2013.0 | 0.0 | 6.3 | NaN | 16 |
| 4914 | Color | Daniel Hsia | 14.0 | 100.0 | 0.0 | 489.0 | Daniel Henney | 946.0 | 10443.0 | Comedy|Drama|Romance | ... | 9.0 | English | USA | PG-13 | NaN | 2012.0 | 719.0 | 6.3 | 2.35 | 660 |
| 4915 | Color | Jon Gunn | 43.0 | 90.0 | 16.0 | 16.0 | Brian Herzlinger | 86.0 | 85222.0 | Documentary | ... | 84.0 | English | USA | PG | 1100.0 | 2004.0 | 23.0 | 6.6 | 1.85 | 456 |
4916 rows × 28 columns
len(df.columns)28
7
index = df.columns
indexIndex(['color', 'director_name', 'num_critic_for_reviews', 'duration',
'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
dtype='object')
[l for l in index if l[0]=='c' or l[0]=='d']['color',
'director_name',
'duration',
'director_facebook_likes',
'cast_total_facebook_likes',
'country',
'content_rating']
len([l for l in index if l[0]=='c' or l[0]=='d'])7
8
[l for l in df.columns if 'actor' in l]['actor_3_facebook_likes',
'actor_2_name',
'actor_1_facebook_likes',
'actor_1_name',
'actor_3_name',
'actor_2_facebook_likes']
len([l for l in df.columns if 'actor' in l])6
9
df.loc[:, ['actor' in l for l in df.columns]]| actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | actor_1_name | actor_3_name | actor_2_facebook_likes | |
|---|---|---|---|---|---|---|
| 0 | 855.0 | Joel David Moore | 1000.0 | CCH Pounder | Wes Studi | 936.0 |
| 1 | 1000.0 | Orlando Bloom | 40000.0 | Johnny Depp | Jack Davenport | 5000.0 |
| 2 | 161.0 | Rory Kinnear | 11000.0 | Christoph Waltz | Stephanie Sigman | 393.0 |
| 3 | 23000.0 | Christian Bale | 27000.0 | Tom Hardy | Joseph Gordon-Levitt | 23000.0 |
| 4 | NaN | Rob Walker | 131.0 | Doug Walker | NaN | 12.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 4911 | 318.0 | Daphne Zuniga | 637.0 | Eric Mabius | Crystal Lowe | 470.0 |
| 4912 | 319.0 | Valorie Curry | 841.0 | Natalie Zea | Sam Underwood | 593.0 |
| 4913 | 0.0 | Maxwell Moody | 0.0 | Eva Boehnke | David Chandler | 0.0 |
| 4914 | 489.0 | Daniel Henney | 946.0 | Alan Ruck | Eliza Coupe | 719.0 |
| 4915 | 16.0 | Brian Herzlinger | 86.0 | John August | Jon Gunn | 23.0 |
4916 rows × 6 columns
lesson 9
np.random.seed(20230426)
day = ['2023-04-24(Mon)','2023-04-25(Tue)','2023-04-26(Wed)','2023-04-27(Thu)','2023-04-28(Fri)',
'2023-05-01(Mon)','2023-05-02(Tue)','2023-05-03(Wed)','2023-05-04(Thu)','2023-05-05(Fri)',
'2023-05-08(Mon)','2023-05-09(Tue)','2023-05-10(Wed)','2023-05-11(Thu)','2023-05-12(Fri)']
hours1 = np.random.randn(15).cumsum()*2
hours1 = hours1 - hours1.min() +1
hours2 = np.random.randn(15).cumsum()*2
hours2 = hours2 - hours2.min() +1
df = pd.DataFrame({'hours(R)':hours1, 'hours(Python)':hours2},index=day)
df| hours(R) | hours(Python) | |
|---|---|---|
| 2023-04-24(Mon) | 11.064829 | 9.254671 |
| 2023-04-25(Tue) | 9.790750 | 7.327548 |
| 2023-04-26(Wed) | 5.993362 | 9.185495 |
| 2023-04-27(Thu) | 7.542498 | 12.525569 |
| 2023-04-28(Fri) | 8.598600 | 10.906909 |
| 2023-05-01(Mon) | 6.933549 | 9.865538 |
| 2023-05-02(Tue) | 6.456987 | 11.081043 |
| 2023-05-03(Wed) | 4.976548 | 10.240239 |
| 2023-05-04(Thu) | 6.021139 | 5.822405 |
| 2023-05-05(Fri) | 1.851839 | 5.522484 |
| 2023-05-08(Mon) | 1.000000 | 4.319094 |
| 2023-05-09(Tue) | 1.350073 | 1.000000 |
| 2023-05-10(Wed) | 3.138700 | 2.633662 |
| 2023-05-11(Thu) | 3.153756 | 4.870860 |
| 2023-05-12(Fri) | 1.353976 | 1.785441 |
1
index = [l.replace(')','').split('(') for l in df.index]index[['2023-04-24', 'Mon'],
['2023-04-25', 'Tue'],
['2023-04-26', 'Wed'],
['2023-04-27', 'Thu'],
['2023-04-28', 'Fri'],
['2023-05-01', 'Mon'],
['2023-05-02', 'Tue'],
['2023-05-03', 'Wed'],
['2023-05-04', 'Thu'],
['2023-05-05', 'Fri'],
['2023-05-08', 'Mon'],
['2023-05-09', 'Tue'],
['2023-05-10', 'Wed'],
['2023-05-11', 'Thu'],
['2023-05-12', 'Fri']]
df.assign(day = [day for day, _ in index], weekday = [weekday for _, weekday in index])| hours(R) | hours(Python) | day | weekday | |
|---|---|---|---|---|
| 2023-04-24(Mon) | 11.064829 | 9.254671 | 2023-04-24 | Mon |
| 2023-04-25(Tue) | 9.790750 | 7.327548 | 2023-04-25 | Tue |
| 2023-04-26(Wed) | 5.993362 | 9.185495 | 2023-04-26 | Wed |
| 2023-04-27(Thu) | 7.542498 | 12.525569 | 2023-04-27 | Thu |
| 2023-04-28(Fri) | 8.598600 | 10.906909 | 2023-04-28 | Fri |
| 2023-05-01(Mon) | 6.933549 | 9.865538 | 2023-05-01 | Mon |
| 2023-05-02(Tue) | 6.456987 | 11.081043 | 2023-05-02 | Tue |
| 2023-05-03(Wed) | 4.976548 | 10.240239 | 2023-05-03 | Wed |
| 2023-05-04(Thu) | 6.021139 | 5.822405 | 2023-05-04 | Thu |
| 2023-05-05(Fri) | 1.851839 | 5.522484 | 2023-05-05 | Fri |
| 2023-05-08(Mon) | 1.000000 | 4.319094 | 2023-05-08 | Mon |
| 2023-05-09(Tue) | 1.350073 | 1.000000 | 2023-05-09 | Tue |
| 2023-05-10(Wed) | 3.138700 | 2.633662 | 2023-05-10 | Wed |
| 2023-05-11(Thu) | 3.153756 | 4.870860 | 2023-05-11 | Thu |
| 2023-05-12(Fri) | 1.353976 | 1.785441 | 2023-05-12 | Fri |
df = df.assign(day = [day for day, _ in index], weekday = [weekday for _, weekday in index]).reset_index().loc[:,'hours(R)':]
df| hours(R) | hours(Python) | day | weekday | |
|---|---|---|---|---|
| 0 | 11.064829 | 9.254671 | 2023-04-24 | Mon |
| 1 | 9.790750 | 7.327548 | 2023-04-25 | Tue |
| 2 | 5.993362 | 9.185495 | 2023-04-26 | Wed |
| 3 | 7.542498 | 12.525569 | 2023-04-27 | Thu |
| 4 | 8.598600 | 10.906909 | 2023-04-28 | Fri |
| 5 | 6.933549 | 9.865538 | 2023-05-01 | Mon |
| 6 | 6.456987 | 11.081043 | 2023-05-02 | Tue |
| 7 | 4.976548 | 10.240239 | 2023-05-03 | Wed |
| 8 | 6.021139 | 5.822405 | 2023-05-04 | Thu |
| 9 | 1.851839 | 5.522484 | 2023-05-05 | Fri |
| 10 | 1.000000 | 4.319094 | 2023-05-08 | Mon |
| 11 | 1.350073 | 1.000000 | 2023-05-09 | Tue |
| 12 | 3.138700 | 2.633662 | 2023-05-10 | Wed |
| 13 | 3.153756 | 4.870860 | 2023-05-11 | Thu |
| 14 | 1.353976 | 1.785441 | 2023-05-12 | Fri |
2
df.query('day < "2023-05-01"')['hours(R)'].sum()42.99003889835529
df.query('day < "2023-05-01"')['hours(Python)'].sum()49.20019054928582
- python
3
{s:df.loc[df.weekday == s, 'hours(R)'].sum() for s in set(df.weekday)}{'Thu': 16.717393020928853,
'Mon': 18.99837797631909,
'Wed': 14.10860912003022,
'Fri': 11.804415159359687,
'Tue': 17.597810683605076}
4
{s:df.loc[df.weekday == s, 'hours(R)'].sum() for s in set(df.weekday)}{'Thu': 16.717393020928853,
'Mon': 18.99837797631909,
'Wed': 14.10860912003022,
'Fri': 11.804415159359687,
'Tue': 17.597810683605076}
{s:df.loc[df.weekday == s, 'hours(Python)'].sum() for s in set(df.weekday)}{'Thu': 23.21883427607518,
'Mon': 23.43930191418699,
'Wed': 22.05939511920843,
'Fri': 18.214832977059253,
'Tue': 19.408591083196185}
{s:df.loc[df.weekday == s,'hours(R)',].sum()+df.loc[df.weekday == s,'hours(Python)'].sum() for s in set(df.weekday)}{'Thu': 39.93622729700404,
'Mon': 42.43767989050608,
'Wed': 36.16800423923865,
'Fri': 30.019248136418938,
'Tue': 37.00640176680126}
5
import matplotlib.pyplot as plty = df['hours(R)'] - df['hours(Python)']
plt.plot(y,'--o')
lesson 10
df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/FIFA23_official_data.csv')
df.head(n=3)| ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | NaN | 2026 | 189cm | 82kg | €157M | 8.0 | NaN |
| 1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | NaN | 2026 | 179cm | 69kg | €155M | 8.0 | NaN |
| 2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | No | <span class="pos pos7">LB | Sep 14, 2020 | NaN | 2024 | 172cm | 69kg | €97.7M | 19.0 | NaN |
3 rows × 29 columns
1
df.Position0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17660, dtype: object
df.loc[:,'Position']0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17660, dtype: object
s = df["Position"]
s0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17660, dtype: object
2
s = s.dropna()
s0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17625, dtype: object
'<span class="pos pos28">SUB'
3
position_dict = {
'GOALKEEPER':['GK'],
'DEFENDER':['CB','RCB','LCB','RB','LB','RWB','LWB'],
'MIDFIELDER':['CM','RCM','LCM','CDM','RDM','LDM','CAM','RAM','LAM','RM','LM'],
'FORWARD':['ST','CF','RF','LF','RW','LW','RS','LS'],
'SUB':['SUB'],
'RES':['RES']
}lst = list(map(lambda x: x.split('>')[-1],s))s.apply(lambda x: x.split('>')[-1])0 SUB
1 LCM
2 LB
3 RCM
4 RCM
...
17655 RES
17656 RES
17657 RES
17658 RES
17659 RES
Name: Position, Length: 17625, dtype: object
[(k,v) for k,v in position_dict.items()][('GOALKEEPER', ['GK']),
('DEFENDER', ['CB', 'RCB', 'LCB', 'RB', 'LB', 'RWB', 'LWB']),
('MIDFIELDER',
['CM', 'RCM', 'LCM', 'CDM', 'RDM', 'LDM', 'CAM', 'RAM', 'LAM', 'RM', 'LM']),
('FORWARD', ['ST', 'CF', 'RF', 'LF', 'RW', 'LW', 'RS', 'LS']),
('SUB', ['SUB']),
('RES', ['RES'])]
[k for l in s.apply(lambda x: x.split('>')[-1]) for k,v in position_dict.items() if l in v]['SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'RES',
'SUB',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'RES',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'RES',
'SUB',
'DEFENDER',
'FORWARD',
'DEFENDER',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'RES',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'FORWARD',
'DEFENDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'RES',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'FORWARD',
'FORWARD',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'RES',
'FORWARD',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'RES',
'RES',
'FORWARD',
'SUB',
'FORWARD',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'RES',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'RES',
'SUB',
'SUB',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'RES',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'RES',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'FORWARD',
'DEFENDER',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'FORWARD',
'DEFENDER',
'SUB',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'FORWARD',
'SUB',
'SUB',
'RES',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'RES',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'DEFENDER',
'FORWARD',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'RES',
'FORWARD',
'RES',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'SUB',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'RES',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'DEFENDER',
'RES',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'RES',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'RES',
'SUB',
'FORWARD',
'RES',
'RES',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'FORWARD',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'RES',
'FORWARD',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'DEFENDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'RES',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'RES',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'RES',
'FORWARD',
'FORWARD',
'DEFENDER',
'RES',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'RES',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'RES',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'RES',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
...]
# list(map(lambda l : [k for l in s for k,v in position_dict.items() if l in v],s))4
df.Age0 27
1 27
2 30
3 31
4 25
..
17655 19
17656 17
17657 25
17658 18
17659 20
Name: Age, Length: 17660, dtype: int64
df.Age.mean()23.127746319365798
list(map(lambda x: 'OB' if x>df.Age.mean() else 'YB', df.Age))['OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'YB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
...]
or
df.Age.apply(5
BMI = 키 / 몸무게^2
df.Height.apply( x[:-2])df.Height0 189cm
1 179cm
2 172cm
3 181cm
4 172cm
...
17655 190cm
17656 195cm
17657 190cm
17658 187cm
17659 186cm
Name: Height, Length: 17660, dtype: object
df.Weight0 82kg
1 69kg
2 69kg
3 70kg
4 68kg
...
17655 78kg
17656 84kg
17657 82kg
17658 79kg
17659 78kg
Name: Weight, Length: 17660, dtype: object
_df = df.loc[:,'Height':'Weight'].applymap(lambda x: int(x[:-2])).eval('BMI=Height/Weight**2')
_df| Height | Weight | BMI | |
|---|---|---|---|
| 0 | 189 | 82 | 0.028108 |
| 1 | 179 | 69 | 0.037597 |
| 2 | 172 | 69 | 0.036127 |
| 3 | 181 | 70 | 0.036939 |
| 4 | 172 | 68 | 0.037197 |
| ... | ... | ... | ... |
| 17655 | 190 | 78 | 0.031229 |
| 17656 | 195 | 84 | 0.027636 |
| 17657 | 190 | 82 | 0.028257 |
| 17658 | 187 | 79 | 0.029963 |
| 17659 | 186 | 78 | 0.030572 |
17660 rows × 3 columns
6
_df.BMI.mean()0.03343718444596221
list(map(lambda x: 'H' if x>_df.BMI.mean() else 'L', _df.BMI))['L',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
...]
_df.BMI.apply(lambda x: 'H' if x>_df.BMI.mean() else 'L')0 L
1 H
2 H
3 H
4 H
..
17655 L
17656 L
17657 L
17658 L
17659 L
Name: BMI, Length: 17660, dtype: object
lesson 11
lesson
_category = ['A']*5+['B']*5
_value = np.concatenate([np.random.randn(5), np.random.randn(5)+10])
df = pd.DataFrame({'category':_category, 'value':_value})
df| category | value | |
|---|---|---|
| 0 | A | 1.086803 |
| 1 | A | 0.448341 |
| 2 | A | 0.575944 |
| 3 | A | 0.277854 |
| 4 | A | -1.924633 |
| 5 | B | 9.844358 |
| 6 | B | 9.819660 |
| 7 | B | 9.425310 |
| 8 | B | 10.191885 |
| 9 | B | 11.187810 |
df.groupby("category").mean()| value | |
|---|---|
| category | |
| A | 0.092862 |
| B | 10.093805 |
df.groupby("category").aggregate(np.mean)| value | |
|---|---|
| category | |
| A | 0.092862 |
| B | 10.093805 |
flights data
df=pd.read_csv('https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/flights.csv')
df.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58492 entries, 0 to 58491
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 MONTH 58492 non-null int64
1 DAY 58492 non-null int64
2 WEEKDAY 58492 non-null int64
3 AIRLINE 58492 non-null object
4 ORG_AIR 58492 non-null object
5 DEST_AIR 58492 non-null object
6 SCHED_DEP 58492 non-null int64
7 DEP_DELAY 57659 non-null float64
8 AIR_TIME 57474 non-null float64
9 DIST 58492 non-null int64
10 SCHED_ARR 58492 non-null int64
11 ARR_DELAY 57474 non-null float64
12 DIVERTED 58492 non-null int64
13 CANCELLED 58492 non-null int64
dtypes: float64(3), int64(8), object(3)
memory usage: 6.2+ MB
- 예제1: 항공사(AIRLINE)별로 도착지연시간의(ARR_DELAY)의 평균을 구하라.
df.groupby("AIRLINE")["ARR_DELAY"].mean()AIRLINE
AA 5.542661
AS -0.833333
B6 8.692593
DL 0.339691
EV 7.034580
F9 13.630651
HA 4.972973
MQ 6.860591
NK 18.436070
OO 7.593463
UA 7.765755
US 1.681105
VX 5.348884
WN 6.397353
Name: ARR_DELAY, dtype: float64
df.groupby("AIRLINE").aggregate({'ARR_DELAY':[np.mean,'count']})| ARR_DELAY | ||
|---|---|---|
| mean | count | |
| AIRLINE | ||
| AA | 5.542661 | 8720 |
| AS | -0.833333 | 768 |
| B6 | 8.692593 | 540 |
| DL | 0.339691 | 10539 |
| EV | 7.034580 | 5697 |
| F9 | 13.630651 | 1305 |
| HA | 4.972973 | 111 |
| MQ | 6.860591 | 3314 |
| NK | 18.436070 | 1486 |
| OO | 7.593463 | 6425 |
| UA | 7.765755 | 7680 |
| US | 1.681105 | 1593 |
| VX | 5.348884 | 986 |
| WN | 6.397353 | 8310 |
- 예제2: 항공사(AIRLINE)별로 비행취소건수(CANCELLED)의 합계를 구하라. 취소건수가 가장 높은 두개의 항공사는 어디인가?
df.groupby("AIRLINE").aggregate({'CANCELLED':'sum'}).sort_values(by="CANCELLED", ascending=False)| CANCELLED | |
|---|---|
| AIRLINE | |
| AA | 154 |
| MQ | 152 |
| EV | 146 |
| OO | 142 |
| UA | 93 |
| WN | 93 |
| DL | 38 |
| NK | 25 |
| US | 21 |
| F9 | 10 |
| VX | 6 |
| B6 | 1 |
| AS | 0 |
| HA | 0 |
- 예제3: 항공사(AIRLINE)별로 비행취소율(CANCELLED)을 구하라. 비행취소율이 가장 높은 두개의 항공사는 어디인가?
df.groupby("AIRLINE").aggregate({'CANCELLED':'mean'}).sort_values(by="CANCELLED", ascending=False)| CANCELLED | |
|---|---|
| AIRLINE | |
| MQ | 0.043791 |
| EV | 0.024923 |
| OO | 0.021554 |
| AA | 0.017303 |
| NK | 0.016491 |
| US | 0.013003 |
| UA | 0.011935 |
| WN | 0.011048 |
| F9 | 0.007593 |
| VX | 0.006042 |
| DL | 0.003585 |
| B6 | 0.001842 |
| AS | 0.000000 |
| HA | 0.000000 |
- 예제5: 아래는 운행거리의 요약통계량이다. 운행거리를 구간별로 [-np.inf,391,690,1199,np.inf]와 같이 나눈뒤 비행취소건수와 취소율을 구하여라.
df.DIST.describe()count 58492.000000
mean 872.900072
std 624.996805
min 67.000000
25% 391.000000
50% 690.000000
75% 1199.000000
max 4502.000000
Name: DIST, dtype: float64
_bin = [-np.inf,391,690,1199,np.inf]df.assign(DIST2 = pd.cut(df.DIST,_bin)).groupby('DIST2').agg('mean')| MONTH | DAY | WEEKDAY | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DIST2 | |||||||||||
| (-inf, 391.0] | 6.120488 | 15.719539 | 3.901425 | 1450.911398 | 9.414477 | 44.863971 | 259.463094 | 1552.313161 | 6.320828 | 0.000814 | 0.022659 |
| (391.0, 690.0] | 6.215019 | 15.736686 | 3.929315 | 1422.562246 | 10.105660 | 79.233721 | 547.309611 | 1563.083018 | 5.673505 | 0.002549 | 0.013503 |
| (690.0, 1199.0] | 6.244324 | 15.643155 | 3.934032 | 1355.024318 | 12.076761 | 121.376579 | 909.723096 | 1573.918111 | 6.603278 | 0.002553 | 0.013637 |
| (1199.0, inf] | 6.304648 | 15.710334 | 3.943070 | 1322.569298 | 12.077681 | 219.595209 | 1794.080413 | 1507.135670 | 4.616760 | 0.003484 | 0.010313 |
Quiz
df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/HRDataset_v14.csv')
df| Employee_Name | EmpID | MarriedID | MaritalStatusID | GenderID | EmpStatusID | DeptID | PerfScoreID | FromDiversityJobFairID | Salary | ... | ManagerName | ManagerID | RecruitmentSource | PerformanceScore | EngagementSurvey | EmpSatisfaction | SpecialProjectsCount | LastPerformanceReview_Date | DaysLateLast30 | Absences | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Adinolfi, Wilson K | 10026 | 0 | 0 | 1 | 1 | 5 | 4 | 0 | 62506 | ... | Michael Albert | 22.0 | Exceeds | 4.60 | 5 | 0 | 1/17/2019 | 0 | 1 | |
| 1 | Ait Sidi, Karthikeyan | 10084 | 1 | 1 | 1 | 5 | 3 | 3 | 0 | 104437 | ... | Simon Roup | 4.0 | Indeed | Fully Meets | 4.96 | 3 | 6 | 2/24/2016 | 0 | 17 |
| 2 | Akinkuolie, Sarah | 10196 | 1 | 1 | 0 | 5 | 5 | 3 | 0 | 64955 | ... | Kissy Sullivan | 20.0 | Fully Meets | 3.02 | 3 | 0 | 5/15/2012 | 0 | 3 | |
| 3 | Alagbe,Trina | 10088 | 1 | 1 | 0 | 1 | 5 | 3 | 0 | 64991 | ... | Elijiah Gray | 16.0 | Indeed | Fully Meets | 4.84 | 5 | 0 | 1/3/2019 | 0 | 15 |
| 4 | Anderson, Carol | 10069 | 0 | 2 | 0 | 5 | 5 | 3 | 0 | 50825 | ... | Webster Butler | 39.0 | Google Search | Fully Meets | 5.00 | 4 | 0 | 2/1/2016 | 0 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 306 | Woodson, Jason | 10135 | 0 | 0 | 1 | 1 | 5 | 3 | 0 | 65893 | ... | Kissy Sullivan | 20.0 | Fully Meets | 4.07 | 4 | 0 | 2/28/2019 | 0 | 13 | |
| 307 | Ybarra, Catherine | 10301 | 0 | 0 | 0 | 5 | 5 | 1 | 0 | 48513 | ... | Brannon Miller | 12.0 | Google Search | PIP | 3.20 | 2 | 0 | 9/2/2015 | 5 | 4 |
| 308 | Zamora, Jennifer | 10010 | 0 | 0 | 0 | 1 | 3 | 4 | 0 | 220450 | ... | Janet King | 2.0 | Employee Referral | Exceeds | 4.60 | 5 | 6 | 2/21/2019 | 0 | 16 |
| 309 | Zhou, Julia | 10043 | 0 | 0 | 0 | 1 | 3 | 3 | 0 | 89292 | ... | Simon Roup | 4.0 | Employee Referral | Fully Meets | 5.00 | 3 | 5 | 2/1/2019 | 0 | 11 |
| 310 | Zima, Colleen | 10271 | 0 | 4 | 0 | 1 | 5 | 3 | 0 | 45046 | ... | David Stanley | 14.0 | Fully Meets | 4.50 | 5 | 0 | 1/30/2019 | 0 | 2 |
311 rows × 36 columns
1
df.groupby(by='RaceDesc').agg({'EmpID':'count'})| EmpID | |
|---|---|
| RaceDesc | |
| American Indian or Alaska Native | 3 |
| Asian | 29 |
| Black or African American | 80 |
| Hispanic | 1 |
| Two or more races | 11 |
| White | 187 |
2
’RaceDesc==White’의 성별(Sex)임금차이는 2000이상이다.
df.groupby(by=['RaceDesc','Sex']).agg({'Salary':'mean'})| Salary | ||
|---|---|---|
| RaceDesc | Sex | |
| American Indian or Alaska Native | F | 63436.500000 |
| M | 70545.000000 | |
| Asian | F | 67520.117647 |
| M | 69939.416667 | |
| Black or African American | F | 66963.829787 |
| M | 85066.121212 | |
| Hispanic | M | 83667.000000 |
| Two or more races | F | 58068.500000 |
| M | 62313.800000 | |
| White | F | 68846.519231 |
| M | 65334.132530 |
3
퇴직한사람(Termd==1)은 모두 104명이며 백인여성의 퇴직자수가 가장 많다.
(df.Termd==1).sum()104
df.groupby(['RaceDesc','Sex']).agg({'Termd':'sum'})| Termd | ||
|---|---|---|
| RaceDesc | Sex | |
| American Indian or Alaska Native | F | 0 |
| M | 0 | |
| Asian | F | 6 |
| M | 3 | |
| Black or African American | F | 15 |
| M | 14 | |
| Hispanic | M | 0 |
| Two or more races | F | 2 |
| M | 1 | |
| White | F | 37 |
| M | 26 |
4
퇴직한사람중 아시아인의 비율은 10%가 넘지 않는다.
df.groupby(by=['RaceDesc']).agg({'Termd':'sum'})| Termd | |
|---|---|
| RaceDesc | |
| American Indian or Alaska Native | 0 |
| Asian | 9 |
| Black or African American | 29 |
| Hispanic | 0 |
| Two or more races | 3 |
| White | 63 |
9/1040.08653846153846154
5
[성별(Sex), 결혼유무(MarriedID)] 별 연봉(Salary)의 평균을 조사하라. 어떠한 그룹이 가장 평균연봉이 적은가?
df.groupby(['Sex','MarriedID']).agg({'Salary':'mean'})| Salary | ||
|---|---|---|
| Sex | MarriedID | |
| F | 0 | 66504.394231 |
| 1 | 69638.986111 | |
| M | 0 | 70967.939759 |
| 1 | 70089.038462 |
6
[성별(Sex), 인종별(RaceDesc)] 별 연봉(Salary)의 중앙값(‘median’)을 조사하라. 연봉의 중앙값이 가장 높은 그룹은 무엇인가?
df.groupby(['Sex','RaceDesc']).agg({'Salary':'median'})| Salary | ||
|---|---|---|
| Sex | RaceDesc | |
| F | American Indian or Alaska Native | 63436.5 |
| Asian | 63676.0 | |
| Black or African American | 61584.0 | |
| Two or more races | 57837.0 | |
| White | 62405.0 | |
| M | American Indian or Alaska Native | 70545.0 |
| Asian | 64731.0 | |
| Black or African American | 71339.0 | |
| Hispanic | 83667.0 | |
| Two or more races | 61568.0 | |
| White | 61809.0 |
7
[성별(Sex), 인종별(RaceDesc)] 별 연봉(Salary)의 중앙값(‘median’)과 근무인원수를 함께 조사하라. 연봉의 중앙값이 가장 높은 그룹이 혜택을 받는 그룹이라고 느껴지는가?
df.groupby(['Sex','RaceDesc']).agg({'Salary':['median','count']})| Salary | |||
|---|---|---|---|
| median | count | ||
| Sex | RaceDesc | ||
| F | American Indian or Alaska Native | 63436.5 | 2 |
| Asian | 63676.0 | 17 | |
| Black or African American | 61584.0 | 47 | |
| Two or more races | 57837.0 | 6 | |
| White | 62405.0 | 104 | |
| M | American Indian or Alaska Native | 70545.0 | 1 |
| Asian | 64731.0 | 12 | |
| Black or African American | 71339.0 | 33 | |
| Hispanic | 83667.0 | 1 | |
| Two or more races | 61568.0 | 5 | |
| White | 61809.0 | 83 | |